*******************************************************************************
* this do file is for: replicating Table 2 in the paper 
* data used in this file: kh_2009pa_cleaned.dta
* date: June 5, 2023
*******************************************************************************

* log file
log using "C:\Users\arthu\Dropbox\research\mte_at\codes\replication\results\kh_2009pa_id_quantities.log"

* global command
global data "C:\Users\arthu\Dropbox\research\mte_at\codes\replication\data"
global merge1_1 "C:\Users\arthu\Dropbox\research\mte_at\codes\replication\results"

*******************************************************************************
* replicate Table 2: Identifiable Quantities in Kern and Hainmueller (2009)
*******************************************************************************

* read data
use "$data\kh_2009pa_cleaned.dta", clear

* global variables
global limit_cov age gender father_occ mother_occ
global communism lenin east_ger poli_pow
global communism_non lenin_kh east_ger_kh poli_pow_kh

* create a matrix for storing the results in Table 2
mat table_2_id_quant = J(9, 2, .)

* Panel A: sum prop of always-takers and never-takers
gen d_0 = 1 if treatment == 0
replace d_0 = 0 if treatment == 1

ci means d_0 if iv == 1 // never-takers: 1.67%
mat table_2_id_quant[1, 1] = round(r(mean), 0.001)
mat table_2_id_quant[1, 2] = round(r(se), 0.001)

ci means treatment if iv == 0 // always-takers: 32.28%
mat table_2_id_quant[2, 1] = round(r(mean), 0.001)
mat table_2_id_quant[2, 2] = round(r(se), 0.001)

reg treatment iv // compliers: 66%
mat table_2_id_quant[3, 1] = round(_b[iv], 0.001)
mat table_2_id_quant[3, 2] = round(_se[iv], 0.001)

* Panel B: propensity score
ci means treatment if iv == 0 // confidence intervals
mat table_2_id_quant[4, 1] = round(r(mean), 0.001)
mat table_2_id_quant[4, 2] = round(r(se), 0.001)

ci means treatment if iv == 1
mat table_2_id_quant[5, 1] = round(r(mean), 0.001)
mat table_2_id_quant[5, 2] = round(r(se), 0.001)

* Panel C: compute E[Y(1) | D(1) = D(0) = 1] (always-takers)
ci mean lenin if treatment == 1 & iv == 0
mat table_2_id_quant[6, 1] = round(r(mean), 0.001)
mat table_2_id_quant[6, 2] = round(r(se), 0.001)

* Panel C: compute: E[Y(0) | D(1) = D(0) = 0] (never-takers)
ci mean lenin if treatment == 0 & iv == 1
mat table_2_id_quant[7, 1] = round(r(mean), 0.001)
mat table_2_id_quant[7, 2] = round(r(se), 0.001)

* Panel D: compute E[Y(0) | C] (for three outcome variables)
gen yd_0 = lenin * d_0
reg yd_0 iv
scalar nomi = _b[iv]
  
reg d_0 iv
scalar denomi = _b[iv]

drop yd_0

mat table_2_id_quant[8, 1] = round(nomi/denomi, 0.001)

* Panel D: compute bootstrap s.e. for E[Y(0) | C] (for three outcome variables)
program define se_y0_complier, rclass
  
  * compute E[Y(1 - T) | Z = 1] - E[Y(1 - T) | Z = 0]
  gen yd_0 = lenin * d_0
  reg yd_0 iv
  scalar nomi = _b[iv]
  
  * compute E[1 - T | Z = 1] - E[1 - T | Z = 0]
  quietly reg d_0 iv
  scalar denomi = _b[iv]
  
  * drop redundant variable: yd_0
  drop yd_0
  
  * return result: e_y0_c
  return scalar e_y0_c = nomi/denomi
  
end

bootstrap r(e_y0_c), reps(1000) seed(20210712): se_y0_complier

mat table_2_id_quant[8, 2] = round(_se[_bs_1], 0.001)

* Panel D: compute E[Y(1) | C] (for three outcome variables)
gen yd = lenin * treatment
reg yd iv
scalar nomi = _b[iv]

reg treatment iv
scalar denomi = _b[iv]
  
drop yd
  
mat table_2_id_quant[9, 1] = round(nomi/denomi, 0.001)

* Panel D: compute bootstrap s.e. for E[Y(0) | C] (for three outcome variables)
program define se_y1_complier, rclass
  
  * compute E[YT | Z = 1] - E[YT | Z = 0]
  gen yd = lenin * treatment
  reg yd iv
  scalar nomi = _b[iv]
  
  * compute E[T | Z = 1] - E[T | Z = 0]
  quietly reg treatment iv
  scalar denomi = _b[iv]
  
  * drop redundant variable: yd_0
  drop yd
  
  * return result: e_y0_c
  return scalar e_y1_c = nomi/denomi
  
end

bootstrap r(e_y1_c), reps(1000) seed(20210712): se_y1_complier

mat table_2_id_quant[9, 2] = round(_se[_bs_1], 0.001)

* generate table
mat rownames table_2_id_quant = "P[NT]" "P[AT]" "P[C]" "p(0)" "p(1)" ///
  "E[Y1AT]" "E[Y0NT]" "E[Y0C]" "E[Y1C]"

mat list table_2_id_quant

frmttable, statmat(table_2_id_quant) varlabels sdec(3)
frmttable using "$merge1_1/table_2.tex", ///
  statmat(table_2_id_quant) varlabels sdec(3) tex fragment replace nocenter

****************
* close log file
****************

log close
